This script is meant to analyze the characteristics of the food waste data.
# Load the library
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr 1.1.4 ✔ readr 2.1.5
## ✔ forcats 1.0.0 ✔ stringr 1.5.1
## ✔ ggplot2 3.5.1 ✔ tibble 3.2.1
## ✔ lubridate 1.9.3 ✔ tidyr 1.3.1
## ✔ purrr 1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(ggplot2)
library(reshape2)
##
## Attaching package: 'reshape2'
##
## The following object is masked from 'package:tidyr':
##
## smiths
library(plotly)
## Warning: package 'plotly' was built under R version 4.4.3
##
## Attaching package: 'plotly'
##
## The following object is masked from 'package:ggplot2':
##
## last_plot
##
## The following object is masked from 'package:stats':
##
## filter
##
## The following object is masked from 'package:graphics':
##
## layout
library(vegan)
## Warning: package 'vegan' was built under R version 4.4.3
## Loading required package: permute
## Warning: package 'permute' was built under R version 4.4.3
## Loading required package: lattice
library(Polychrome)
## Warning: package 'Polychrome' was built under R version 4.4.3
theme_set(theme_bw(base_size = 16))
### bring in the data
df <- read.csv(file = "Dataset/Food_waste_characterization.csv",header = TRUE,sep = ",")
as always we should look at the dimensions and check to see if everything loaded properly
dim(df)
## [1] 88 17
glimpse(df)
## Rows: 88
## Columns: 17
## $ num <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 1…
## $ G.num <chr> "DP.1", "DP.2", "DP.3", "DP.4", "DP.5", "DP.6", "FOG…
## $ Food.Wastes <chr> "Cheese", "Milk", "Baby milk", "Yogurt/yogurt drink"…
## $ Food.Wastes.Clean <chr> "Cheese", "Milk", "Baby_milk", "Yogurt_yogurt_drink"…
## $ Group <chr> "DP", "DP", "DP", "DP", "DP", "DP", "FOG", "FOG", "F…
## $ pH <dbl> 5.93, 6.76, 7.15, 4.30, 6.37, 7.23, 3.50, 3.20, 3.01…
## $ TS.Perc <dbl> 49.86, 10.96, 11.38, 30.90, 39.56, 88.58, 75.40, 99.…
## $ VS.Perc <dbl> 42.63, 10.56, 10.89, 30.30, 37.27, 88.22, 71.63, 98.…
## $ TOC <dbl> 29.80, 5.39, 4.41, 12.80, 51.00, 74.90, 86.10, 74.63…
## $ TKN <dbl> 2.90, 1.66, 1.95, 0.91, 1.54, 0.51, 0.35, 0.22, 0.69…
## $ Fat.Perc <dbl> 23.20, 15.60, 23.80, 5.10, 19.50, 83.40, 100.00, 100…
## $ Protein.Perc <dbl> 18.50, 33.40, 10.35, 14.30, 4.50, 3.27, 0.00, 0.00, …
## $ Carbohydrate.Perc <dbl> 58.30, 51.00, 65.85, 80.60, 76.00, 13.33, 0.00, 0.00…
## $ TP <dbl> 1.14, 1.70, 1.06, 0.70, 1.09, 2.30, 0.01, 0.00, 0.00…
## $ TK <dbl> 0.17, 1.10, 1.69, 0.14, 0.60, 1.50, 0.00, 0.01, 0.00…
## $ C.N.Mixture <dbl> 17.38, 3.32, 17.30, 14.06, 17.31, 17.38, 48.50, 26.9…
## $ BMP <dbl> 561.0, 231.0, 315.0, 450.0, 591.0, 660.0, 586.0, 648…
# looks good
# lets get a summary for completeness
summary(df)
## num G.num Food.Wastes Food.Wastes.Clean
## Min. : 1.00 Length:88 Length:88 Length:88
## 1st Qu.:22.75 Class :character Class :character Class :character
## Median :44.50 Mode :character Mode :character Mode :character
## Mean :44.50
## 3rd Qu.:66.25
## Max. :88.00
## Group pH TS.Perc VS.Perc
## Length:88 Min. :2.630 Min. : 1.52 Min. : 1.51
## Class :character 1st Qu.:4.965 1st Qu.:19.64 1st Qu.:15.94
## Mode :character Median :6.085 Median :40.08 Median :31.70
## Mean :5.772 Mean :48.88 Mean :40.86
## 3rd Qu.:6.737 3rd Qu.:88.58 3rd Qu.:69.71
## Max. :7.850 Max. :99.95 Max. :99.75
## TOC TKN Fat.Perc Protein.Perc
## Min. : 3.50 Min. :0.0400 Min. : 0.000 Min. : 0.000
## 1st Qu.:12.50 1st Qu.:0.8225 1st Qu.: 1.075 1st Qu.: 7.168
## Median :29.29 Median :1.6000 Median : 7.300 Median :15.320
## Mean :30.94 Mean :1.9482 Mean : 26.566 Mean :16.221
## 3rd Qu.:44.62 3rd Qu.:2.8925 3rd Qu.: 55.733 3rd Qu.:23.773
## Max. :86.10 Max. :6.1500 Max. :100.000 Max. :43.200
## Carbohydrate.Perc TP TK C.N.Mixture
## Min. : 0.00 Min. :0.0000 Min. : 0.000 Min. : 2.08
## 1st Qu.:18.38 1st Qu.:0.1775 1st Qu.: 0.180 1st Qu.:16.12
## Median :69.86 Median :0.4900 Median : 0.730 Median :18.02
## Mean :57.11 Mean :0.6369 Mean : 1.387 Mean :19.46
## 3rd Qu.:82.81 3rd Qu.:0.9425 3rd Qu.: 2.138 3rd Qu.:21.90
## Max. :99.40 Max. :3.6600 Max. :10.450 Max. :48.50
## BMP
## Min. : 216.0
## 1st Qu.: 372.2
## Median : 440.0
## Mean : 471.6
## 3rd Qu.: 524.8
## Max. :1476.0
The first thing we want to do is understand the specific masses of each substrate per food waste item. For this we are going to assume that the TS.Perc represents the total mass of digestible substrate and from here we can calculate the mass of each of the components
example:
Mass Carbohydrate = [(Standard Mass (1000g) * TS.Perc) * Carbohydrate.Perc]
# first we should convert everything into a percent
df.p <- df %>% mutate(TS.Perc=TS.Perc/100) %>% mutate(VS.Perc=VS.Perc/100) %>% mutate(Fat.Perc=Fat.Perc/100) %>% mutate(Protein.Perc=Protein.Perc/100) %>% mutate(Carbohydrate.Perc=Carbohydrate.Perc/100)
# and now we can make a standard mass per 1000grams
kg <- 1000
df.p <- df.p %>% mutate(TS.mass=kg*TS.Perc) %>% mutate(Fat.mass=TS.mass*Fat.Perc) %>% mutate(Protein.mass=TS.mass*Protein.Perc) %>% mutate(Carbohydrate.mass=TS.mass*Carbohydrate.Perc)
And with that we can start making plots to visualize the different food wastes
We will begin with a histogram showing the freq distribution of different percentage quantities of each food
hist(df.p$Fat.Perc)
hist(df.p$Protein.Perc)
hist(df.p$Carbohydrate.Perc)
We can melt the data so it is easier to use in ggplot as well
df.m <- df.p %>% select(G.num,Group,Food.Wastes,Fat.Perc,Protein.Perc,Carbohydrate.Perc) %>% melt(id.vars = c("G.num","Food.Wastes","Group"))
ggplot(data = df.m,aes(x=Group,y=value,fill = variable))+geom_bar(stat='identity',position = 'dodge')
df.mm <- df.p %>% select(G.num,Group,Food.Wastes,Fat.mass,Protein.mass,Carbohydrate.mass) %>% melt(id.vars = c("G.num","Food.Wastes","Group"))
ggplot(data = df.mm,aes(x=Group,y=value,fill = variable))+geom_bar(stat='identity',position = 'dodge')
plot_ly(z=df.p$Carbohydrate.mass, y=df.p$Protein.mass, x=df.p$Fat.mass, type = "scatter3d",mode="markers", color = df.p$Group)
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
plot_ly(z=df.p$Carbohydrate.Perc, y=df.p$Protein.Perc, x=df.p$Fat.Perc, type = "scatter3d",mode="markers", color = df.p$Group)
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
fig <- plot_ly(df.p, x = ~Carbohydrate.Perc, y = ~Protein.Perc, z = ~Fat.Perc, color = ~Group)
fig <- fig %>% add_markers()
fig <- fig %>% layout(scene = list(xaxis = list(title = '% Carbohydrates'),
yaxis = list(title = '% Proteins'),
zaxis = list(title = '% Fat')))
fig
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
pc <- df %>% select(G.num,Food.Wastes,Group,Fat.Perc,Protein.Perc,Carbohydrate.Perc)
com = pc[,4:ncol(pc)]
m_com = as.matrix(com)
set.seed(666)
nmds = metaMDS(m_com, distance = "bray")
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.02537536
## Run 1 stress 0.02537532
## ... New best solution
## ... Procrustes: rmse 3.78026e-05 max resid 0.0001308946
## ... Similar to previous best
## Run 2 stress 0.02537538
## ... Procrustes: rmse 3.490184e-05 max resid 0.0001190826
## ... Similar to previous best
## Run 3 stress 0.02537537
## ... Procrustes: rmse 4.19713e-05 max resid 0.0001460885
## ... Similar to previous best
## Run 4 stress 0.02537538
## ... Procrustes: rmse 3.000317e-05 max resid 0.0001042045
## ... Similar to previous best
## Run 5 stress 0.02537532
## ... Procrustes: rmse 8.910618e-06 max resid 3.048899e-05
## ... Similar to previous best
## Run 6 stress 0.02537533
## ... Procrustes: rmse 2.542539e-05 max resid 8.824816e-05
## ... Similar to previous best
## Run 7 stress 0.02537537
## ... Procrustes: rmse 2.848793e-05 max resid 0.0001014179
## ... Similar to previous best
## Run 8 stress 0.02537543
## ... Procrustes: rmse 4.703526e-05 max resid 0.000162448
## ... Similar to previous best
## Run 9 stress 0.02537536
## ... Procrustes: rmse 2.941745e-05 max resid 0.0001020464
## ... Similar to previous best
## Run 10 stress 0.02537534
## ... Procrustes: rmse 2.198387e-05 max resid 7.540184e-05
## ... Similar to previous best
## Run 11 stress 0.0253754
## ... Procrustes: rmse 5.040864e-05 max resid 0.0001744206
## ... Similar to previous best
## Run 12 stress 0.02537541
## ... Procrustes: rmse 4.396229e-05 max resid 0.0001511795
## ... Similar to previous best
## Run 13 stress 0.02537541
## ... Procrustes: rmse 4.147091e-05 max resid 0.000142173
## ... Similar to previous best
## Run 14 stress 0.02537532
## ... Procrustes: rmse 5.970192e-06 max resid 2.544822e-05
## ... Similar to previous best
## Run 15 stress 0.0253754
## ... Procrustes: rmse 5.032362e-05 max resid 0.0001750842
## ... Similar to previous best
## Run 16 stress 0.02537542
## ... Procrustes: rmse 4.644215e-05 max resid 0.0001589299
## ... Similar to previous best
## Run 17 stress 0.02537537
## ... Procrustes: rmse 3.89534e-05 max resid 0.0001352629
## ... Similar to previous best
## Run 18 stress 0.02537541
## ... Procrustes: rmse 4.509028e-05 max resid 0.000156938
## ... Similar to previous best
## Run 19 stress 0.02537537
## ... Procrustes: rmse 3.0038e-05 max resid 0.0001037159
## ... Similar to previous best
## Run 20 stress 0.0253754
## ... Procrustes: rmse 4.138958e-05 max resid 0.0001415059
## ... Similar to previous best
## *** Best solution repeated 20 times
nmds
##
## Call:
## metaMDS(comm = m_com, distance = "bray")
##
## global Multidimensional Scaling using monoMDS
##
## Data: wisconsin(sqrt(m_com))
## Distance: bray
##
## Dimensions: 2
## Stress: 0.02537532
## Stress type 1, weak ties
## Best solution was repeated 20 times in 20 tries
## The best solution was from try 1 (random start)
## Scaling: centring, PC rotation, halfchange scaling
## Species: expanded scores based on 'wisconsin(sqrt(m_com))'
plot(nmds)
data.scores = as.data.frame(scores(nmds)$sites)
data.scores$G.num = pc$G.num
data.scores$Food.Wastes = pc$Food.Wastes
data.scores$Group = pc$Group
ggplot(data.scores, aes(x = NMDS1, y = NMDS2)) +
geom_point(size = 3, aes( colour = Group))
unique(data.scores$Group)
## [1] "DP" "FOG" "IC" "FAV" "CCG" "CP" "BW" "MP" "FP" "EP"
## [11] "SSG" "SS" "BEV" "RERW" "OT"
P15 = createPalette(15, c("#ff0000", "#00ff00", "#0000ff"))
ggplot(data.scores, aes(x = NMDS1, y = NMDS2)) +
geom_point(size = 3, aes( colour = Group))+scale_color_manual(values = unname(P15))